package freebase;

import zclTool.*;

import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.util.*;

//filter away the articles not in freebase
public class MyInfobox {
	public static void main(String []args)throws Exception{
		DelimitedReader dr = new DelimitedReader(args[0]);
		HashSet<Integer> rphid_set = new HashSet<Integer>();
		
		String[] line;
		while((line = dr.read())!=null){
			rphid_set.add(Integer.parseInt(line[0]));
		}
		dr.close();
		
		//attributeId, articleId, textId, templateId, templateName, 5 attributeName, attributeValueWiki, attributeValueHtml, attributeValueText, attributesNamesId
		dr = new DelimitedReader(args[1]);
		OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(args[2]),"utf-8");
		int ln = 0;
		while((line = dr.read())!=null && ln <100){
			int rphid = Integer.parseInt(line[1]);
			if(rphid_set.contains(rphid)){
				osw.write(line[1]+"\t"+line[4]+"\t"+line[5]+"\t"+line[6]+"\n");
			}
		}
		dr.close();
		
	}
}
